//*************************************************************************************************
//
//	Description:
//		VSMBlur.fx
//
//	<P> Copyright (c) 2008 Blimey! Games Ltd. All rights reserved.
//
//	Author: 
//		Alastair Murray
//
//	History:
//
//	<TABLE>
//		\Author         Date        Version       Description
//		--------        -----       --------      ------------
//		AMurray		    27/03/2008  0.1           Created
//	<TABLE>
//
//*************************************************************************************************

#include "stddefs.fxh"


texture inputPTex : TEXTURE;
sampler inputPSampler : SAMPLER = sampler_state
{
	FX_SAMPLERSTATE_LINEAR_TEXTURE
	Texture = < inputPTex >;
	AddressU  = Clamp;
	AddressV  = Clamp;
	//MinFilter = Point;
	//MagFilter = Point;
	MinFilter = Linear;
	MagFilter = Linear;
#ifdef _XBOX
	MinFilterZ = Point;
	MagFilterZ = Point;
#endif
	SET_NO_ANISOTROPY
	MipFilter = None;
};

texture inputLTex : TEXTURE;
sampler inputLSampler : SAMPLER = sampler_state
{
	FX_SAMPLERSTATE_LINEAR_TEXTURE
	Texture = < inputLTex >;
	AddressU  = Clamp;
	AddressV  = Clamp;
	MinFilter = Linear;
	MagFilter = Linear;
#ifdef _XBOX
	MinFilterZ = Point;
	MagFilterZ = Point;
#endif
	SET_NO_ANISOTROPY
	MipFilter = None;
};


struct VSINPUT
{
	float3 position : POSITION;
	float2 texCoord : TEXCOORD0;
};

struct VSOUTPUT
{
	float4 position : POSITION;
#ifdef _XBOX
	float3 texCoord : TEXCOORD0;
#else
	float2 texCoord : TEXCOORD0;
#endif
};

float4	offset_scale_vertices;
float2	offset_scale_texcoords;

#ifdef _XBOX
float	array_texture_depth;
#endif

float4	offsets[9];
/*
{
	float2( -2.0f/1024.0f, 0.0f ),
	float2( -1.0f/1024.0f, 0.0f ),
	float2( 0.0f, 0.0f ),
	float2( 1.0f/1024.0f, 0.0f ),
	float2( 2.0f/1024.0f, 0.0f ),
	float2( 0.0f, -1.5f/1024.0f ),
	float2( 0.0f, -0.5f/1024.0f ),
	float2( 0.0f, 0.5f/1024.0f ),
	float2( 0.0f, 1.5f/1024.0f ),
};*/

VSOUTPUT VSMBlurVS( VSINPUT _input )
{
	VSOUTPUT output;

	output.position = float4( _input.position.xyz, 1.0f );
	output.texCoord.xy = _input.texCoord;

	output.position.y *= -1.0f;
	output.position.y = ((output.position.y+1.0f)*0.5f);
	output.position.y *= offset_scale_vertices.y;
	output.position.y += offset_scale_vertices.x;
	output.position.y = ((output.position.y*2.0f)-1.0f);
	output.position.y *= -1.0f;
	
	#ifndef _XBOX
	output.position.xy -= offset_scale_vertices.zw;	// adjust for half pixel
	#endif

	output.texCoord.y *= offset_scale_texcoords.y;
	output.texCoord.y += offset_scale_texcoords.x;

#ifdef _XBOX
	output.texCoord.z = array_texture_depth;
#endif

	return output;
}

float4	CopyDepthToVariancePS( float2 texCoord : TEXCOORD0 ) : COLOR0
{
    float 	d = tex2D( inputPSampler, texCoord ).r;
	float4	output = float4( d, d*d, 0.0f, 0.0f );
#ifdef _XBOX
	output.rg = (output.rg*64.0f)-32.0f;	// map from -32 to +32 to cover entire range
#endif
    return output;
}

#ifdef _XBOX
float4	HorizBlurPS( float3 texCoord : TEXCOORD0 ) : COLOR0
#else
float4	HorizBlurPS( float2 texCoord : TEXCOORD0 ) : COLOR0
#endif
{
  // Fetch a row of 5 pixels from the D24S8 depth map
  float4 DepthSamples0123;
  float  DepthSamples4___;

#ifdef _XBOX
	// we're sampling from an array texture here
	DepthSamples0123.x = tex3D( inputPSampler, float3(texCoord.xy+offsets[0].xy,texCoord.z) ).r;
	DepthSamples0123.y = tex3D( inputPSampler, float3(texCoord.xy+offsets[1].xy,texCoord.z) ).r;
	DepthSamples0123.z = tex3D( inputPSampler, texCoord ).r;
	DepthSamples0123.w = tex3D( inputPSampler, float3(texCoord.xy+offsets[3].xy,texCoord.z) ).r;
	DepthSamples4___.x = tex3D( inputPSampler, float3(texCoord.xy+offsets[4].xy,texCoord.z) ).r;

    /*asm
    {
        tfetch3D DepthSamples0123.x___, texCoord, inputPSampler, OffsetX = -2.0, MinFilter=point, MagFilter=point
        tfetch3D DepthSamples0123._x__, texCoord, inputPSampler, OffsetX = -1.0, MinFilter=point, MagFilter=point
        tfetch3D DepthSamples0123.__x_, texCoord, inputPSampler, OffsetX = -0.0, MinFilter=point, MagFilter=point
        tfetch3D DepthSamples0123.___x, texCoord, inputPSampler, OffsetX = +1.0, MinFilter=point, MagFilter=point
        tfetch3D DepthSamples4___.x___, texCoord, inputPSampler, OffsetX = +2.0, MinFilter=point, MagFilter=point
    };*/

	// scale from -1 -> 1  to  0 -> 1
	DepthSamples0123 = (DepthSamples0123+1.0f)*0.5f;
	DepthSamples4___.x = (DepthSamples4___.x+1.0f)*0.5f;

#else
	DepthSamples0123.x = tex2D( inputPSampler, texCoord+offsets[0].xy ).r;
	DepthSamples0123.y = tex2D( inputPSampler, texCoord+offsets[1].xy ).r;
	DepthSamples0123.z = tex2D( inputPSampler, texCoord ).r;
	DepthSamples0123.w = tex2D( inputPSampler, texCoord+offsets[3].xy ).r;
	DepthSamples4___.x = tex2D( inputPSampler, texCoord+offsets[4].xy ).r;
#endif

//DepthSamples0123 = (DepthSamples0123+1.0f)*0.5f;
//DepthSamples4___ = (DepthSamples4___+1.0f)*0.5f;
  
  // Do the Gaussian blur (using a 5-tap filter kernel of [ 1 4 6 4 1 ] )
  float z  = dot( DepthSamples0123.xyzw,  float4( 1.0/16, 4.0/16, 6.0/16, 4.0/16 ) ) + DepthSamples4___.x * ( 1.0 / 16 );

#ifdef _XBOX
	// rebuild z2 from filtering z   TODO: is this really best for Xbox?
  DepthSamples0123.xyzw = DepthSamples0123.xyzw * DepthSamples0123.xyzw;
  DepthSamples4___.x    = DepthSamples4___.x    * DepthSamples4___.x;
  float z2 = dot( DepthSamples0123.xyzw,  float4( 1.0/16, 4.0/16, 6.0/16, 4.0/16 ) ) + DepthSamples4___.x * ( 1.0 / 16 );
#else	
	// blur existing z2 values
	DepthSamples0123.x = tex2D( inputPSampler, texCoord+offsets[0].xy ).g;
	DepthSamples0123.y = tex2D( inputPSampler, texCoord+offsets[1].xy ).g;
	DepthSamples0123.z = tex2D( inputPSampler, texCoord ).g;
	DepthSamples0123.w = tex2D( inputPSampler, texCoord+offsets[3].xy ).g;
	DepthSamples4___.x = tex2D( inputPSampler, texCoord+offsets[4].xy ).g;
  float z2  = dot( DepthSamples0123.xyzw,  float4( 1.0/16, 4.0/16, 6.0/16, 4.0/16 ) ) + DepthSamples4___.x * ( 1.0 / 16 );
#endif


	//float dx = ddx(z);
	//float dy = ddy(z);
	//z2 = z2 + 0.25f*(dx*dx + dy*dy);

//z = (z*2.0f)-1.0f;  
//z2 = (z2*2.0f)-1.0f;  

#ifdef _XBOX
	// need to scale from -32 -> +32
	z = (z*64.0f)-32.0f;
	z2 = (z2*64.0f)-32.0f;
#endif

  return float4( z, z2, 0, 0 );
}

float4	VertBlurPS( float2 texCoord : TEXCOORD0 ) : COLOR0
{
    // Note that this second pass of the separable filter can use filtered fetches
    // Fetch 4 samples which filter across a column of 5 pixels from the VSM
    float4 t0, t1;

#ifdef _XBOX
    /*asm
    {
        tfetch2D t0.xy__, texCoord, inputLSampler, OffsetY = +1.5, MinFilter=linear, MagFilter=linear
        tfetch2D t0.__xy, texCoord, inputLSampler, OffsetY = +0.5, MinFilter=linear, MagFilter=linear
        tfetch2D t1.xy__, texCoord, inputLSampler, OffsetY = -0.5, MinFilter=linear, MagFilter=linear
        tfetch2D t1.__xy, texCoord, inputLSampler, OffsetY = -1.5, MinFilter=linear, MagFilter=linear
    };*/
		t0.xy = tex2D( inputLSampler, texCoord+offsets[5].xy ).rg;
		t0.zw = tex2D( inputLSampler, texCoord+offsets[6].xy ).rg;
		t1.xy = tex2D( inputLSampler, texCoord+offsets[7].xy ).rg;
		t1.zw = tex2D( inputLSampler, texCoord+offsets[8].xy ).rg;

		// scale from -1 -> 1  to  0 -> 1
		t0 = (t0+1.0f)*0.5f;
		t1 = (t1+1.0f)*0.5f;
#else
		t0.xy = tex2D( inputLSampler, texCoord+offsets[5].xy ).rg;
		t0.zw = tex2D( inputLSampler, texCoord+offsets[6].xy ).rg;
		t1.xy = tex2D( inputLSampler, texCoord+offsets[7].xy ).rg;
		t1.zw = tex2D( inputLSampler, texCoord+offsets[8].xy ).rg;
#endif
    
//t0 = (t0+1.0f)*0.5f;
//t1 = (t1+1.0f)*0.5f;

    // Sum results with Gaussian weights
    float z  = dot( float4( t0.x, t0.z, t1.x, t1.z ), float4( 2.0/16, 6.0/16, 6.0/16, 2.0/16 ) );
    float z2 = dot( float4( t0.y, t0.w, t1.y, t1.w ), float4( 2.0/16, 6.0/16, 6.0/16, 2.0/16 ) );

	//float dx = ddx(z);
	//float dy = ddy(z);
	//z2 = (z2) + 0.25f*(dx*dx + dy*dy);

//z = (z*2.0f)-1.0f;  
//z2 = (z2*2.0f)-1.0f;  

#ifdef _XBOX
	// need to scale from -32 -> +32
	z = (z*64.0f)-32.0f;
	z2 = (z2*64.0f)-32.0f;
#endif

    return float4( z, z2, 0, 0 );
}




VSOUTPUT PreviewShadowMapVS( VSINPUT _input )
{
	VSOUTPUT output;

	output.position = float4( _input.position.xyz, 1.0f );
	output.texCoord.xy = _input.texCoord;

	output.position.y *= -1.0f;
	output.position.y = ((output.position.y+1.0f)*0.5f);
	
	output.position.xy *= offset_scale_vertices.xy;
	output.position.xy += offset_scale_vertices.zw;
	
	output.position.y = ((output.position.y*2.0f)-1.0f);
	output.position.y *= -1.0f;

	output.texCoord.y *= offset_scale_texcoords.y;
	output.texCoord.y += offset_scale_texcoords.x;

#ifdef _XBOX
	output.texCoord.z = array_texture_depth;
#endif

	return output;
}


float4	PreviewShadowMapPS( float2 texCoord : TEXCOORD0 ) : COLOR0
{
    float4 t0;
		t0 = tex2D( inputPSampler, texCoord );

    float z  = t0.r;
		z *= z;

    return float4( z, z, z, 1.0f );
}





technique CopyDepthToVariance
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx VSMBlurVS();
		PixelShader = compile sce_fp_rsx CopyDepthToVariancePS();
#else		
		CullMode = None;
		VertexShader = compile vs_3_0 VSMBlurVS();
		PixelShader = compile ps_3_0 CopyDepthToVariancePS();
#endif
	}
}

technique HorizBlur
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx VSMBlurVS();
		PixelShader = compile sce_fp_rsx HorizBlurPS();
#else		
		CullMode = None;
		VertexShader = compile vs_3_0 VSMBlurVS();
		PixelShader = compile ps_3_0 HorizBlurPS();
#endif
	}
}

technique VertBlur
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx VSMBlurVS();
		PixelShader = compile sce_fp_rsx VertBlurPS();
#else		
		CullMode = None;
		VertexShader = compile vs_3_0 VSMBlurVS();
		PixelShader = compile ps_3_0 VertBlurPS();
#endif
	}
}

technique PreviewShadowMap
{
	pass Pass0
	{
		ZEnable = 0;
		ZWriteEnable = false;
		AlphaBlendEnable = false;
		AlphaTestEnable = false;
#ifdef _PS3_
		CullFaceEnable=false;
		VertexShader = compile sce_vp_rsx PreviewShadowMapVS();
		PixelShader = compile sce_fp_rsx PreviewShadowMapPS();
#else		
		CullMode = None;
		VertexShader = compile vs_3_0 PreviewShadowMapVS();
		PixelShader = compile ps_3_0 PreviewShadowMapPS();
#endif
	}
}
